#! /usr/bin/env python
# coding=utf-8
import concurrent.futures
import os
import sys
import threading

import vmmap
import footprint
import vm_stat

PROCESS_SIZE_KEYS = ("threads", "rss", "vmmap_regions") + tuple(["vmmap_" + i for i in vmmap.VMMAP_SIZE_KEYS]) + tuple(["footprint_" + i for i in footprint.FOOTPRINT_SIZE_KEYS])

# return a list of lines of output of the command
def command(command, *args):
    pipe = os.popen(command + ' ' + ' '.join(args), 'r')
    output = pipe.read().strip()
    status = pipe.close()
    if status is not None and os.WEXITSTATUS(status) != 0:
        print("Command failed with status", os.WEXITSTATUS(status), ":", command, ' '.join(args))
        print("With output:", output)
        sys.exit(1)
    return [i for i in output.split('\n') if i]

class ProcessMgr(object):
    def __init__(self, backlog_dir=None):
        if backlog_dir:
            self.__backlog_dir = backlog_dir
        else:
            from datetime import datetime
            now = datetime.now()
            self.__backlog_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                              "backlog_" + now.strftime("%Y%m%d_%H%M"))

        self.__categories = {}

        if not os.path.exists(self.__backlog_dir):
            os.makedirs(self.__backlog_dir)

        self.__processes = []
        if os.path.exists(self.__backlog_dir + "/ps.txt"):
            self.__load_meminfo()

    def print_types(self):
        for k, v in self.__categories.items():
            print("%s -> %d" % (k, v))

    def __merge_footprint(proc, footprint, mgr=None):
        for k, v in footprint.getSum().items():
            proc[k] = v
        if mgr:
            for cat in footprint.get_all():
                k = cat["Category"]
                if k in mgr.__categories:
                    mgr.__categories[k] = mgr.__categories[k] + cat["Regions"]
                else:
                    mgr.__categories[k] = cat["Regions"]
        proc["categories"] = len(footprint.get_all())

    def __merge_vmmap(proc, vm, mgr=None):
        for k, v in vm.getSum().items():
            proc[k] = v
        proc["vmmap_regions"] = len(vm.get_all_regions())

    def __load_meminfo(self):
        with open(self.__get_output_filename("ps"), "r") as f:
            self.__parse_ps(f.readlines())

        self.__meminfo = {}
        st = vm_stat.MachSystemVmStat(self.__get_output_filename("vm_stat"))
        for k, v in st.items():
            self.__meminfo[k] = v

        self.__region_types = []

        for proc in self.__processes:
            f_name = self.__get_output_filename("footprint", proc["pid"])
            ProcessMgr.__merge_footprint(proc, footprint.ProcessFootprint(f_name), self)
            f_name = self.__get_output_filename("vmmap", proc["pid"])
            proc["__regions"] = []

            p = vmmap.ProcessVmmap(f_name)
            ProcessMgr.__merge_vmmap(proc, p, self)

            region_types = p.get_region_types()
            print("Parsing vmmap file %s got %d regions with %d types." % (f_name, len(p.get_all_regions()), len(region_types)))
            for obj in p.get_all_regions():
                obj["__process"] = proc
                proc["__regions"].append(obj)

            proc["region_types"] = len(region_types)
            proc["__region_types"] = region_types
            for obj in region_types:
                if obj["name"] not in self.__region_types:
                    self.__region_types.append(obj["name"])
                obj["__process"] = proc

        self.__do_memory_summary()

    def __do_memory_summary(self):
        self.__meminfo["processes"] = len(self.get_all())
        self.__meminfo["region_types"] = len(self.__region_types)
        for k in PROCESS_SIZE_KEYS:
            self.__meminfo[k] = 0
        for item in self.get_all():
            for k in PROCESS_SIZE_KEYS:
                self.__meminfo[k] = self.__meminfo[k] + item[k]

    def get_meminfo(self):
        return self.__meminfo

    def get_all(self):
        return self.__processes

    def report(self, fileName):
        f = open(fileName, "w")
        f.write("<processes>\n")
        for obj in self.__processes:
            f.write("<process>\n")
            for k in obj.keys():
                if k.startswith("__"):
                    continue
                f.write("<%s>%s</%s>\n" % (k, obj[k], k))
            f.write("</process>\n")
        f.write("</processes>\n")
        f.close()

    def get_object_by_name(self, name):
        if name in self._objects_dict:
            return self._objects_dict[name]
        return None

    def __parse_thread(self, line):
        line = line.strip()
        parts = line.split()
        thread = {}
        thread["cpu"] = parts[1]
        thread["state"] = parts[2]
        thread["priority"] = parts[3]
        thread["ni"] = parts[7]
        return thread

    def __get_leading_part(line, len=0):
        if len == 0:
            pos = line.find(" ")
            if pos <= 0:
                return None
        else:
            pos = len
        return (line[:pos].strip(), line[pos + 1:].strip())

    def __parse_process(self, line, pid):
        proc = {}
        proc["__threads"] = []
        proc["threads"] = 1

        # Skip user
        parts = ProcessMgr.__get_leading_part(line)

        # Get pid
        parts = ProcessMgr.__get_leading_part(parts[1])
        proc["pid"] = int(parts[0].strip())
        if proc["pid"] == pid:
            return None

        # Skip TT
        parts = ProcessMgr.__get_leading_part(parts[1])

        # Get cpu
        parts = ProcessMgr.__get_leading_part(parts[1])
        proc["cpu"] = parts[0].strip()

        parts = ProcessMgr.__get_leading_part(parts[1])
        proc["state"] = parts[0].strip()

        parts = ProcessMgr.__get_leading_part(parts[1])
        proc["priority"] = parts[0].strip()

        # Skip STIME
        parts = ProcessMgr.__get_leading_part(parts[1])
        # Skip UTIME
        parts = ProcessMgr.__get_leading_part(parts[1])
        # Skip First COMMAND
        parts = ProcessMgr.__get_leading_part(parts[1], 10)

        parts = parts[1].strip().split()
        proc["ppid"] = int(parts[0].strip())
        if proc["ppid"] == pid:
            return None
        proc["nice"] = int(parts[1].strip())
        proc["rss"] = int(parts[2].strip())
        proc["vsize"] = int(parts[3].strip())
        try:
            proc["wq"] = int(parts[4].strip())
        except:
            proc["wq"] = 0
        proc["command"] = " ".join(parts[5:])
        return proc

    def __parse_ps(self, lines, f=None):
        firstLine = True
        last_proc = {}
        pid = os.getpid()
        for line in lines:
            if firstLine:
                if f:
                    f.write(line + "\n")
                firstLine = False
                continue
            if line.startswith(" "):
                if last_proc:
                    last_proc["__threads"].append(self.__parse_thread(line))
                    last_proc["threads"] = last_proc["threads"] + 1
                    if f:
                        f.write(line + "\n")
                continue
            proc = self.__parse_process(line, pid)
            last_proc = proc
            if not proc:
                continue
            if f:
                f.write(line + "\n")
            self.__processes.append(last_proc)

    def __create_formatted_output_dir(self):
        formated_dir = os.path.join(self.__backlog_dir, "formatted")
        if not os.path.exists(formated_dir):
            os.makedirs(formated_dir)

    def __get_output_filename(self, type, pid = None, formatted=False):
        if type in ("ps", "vm_stat"):
            if formatted:
                return "%s/formatted/%s.xml" % (self.__backlog_dir, type)
            return "%s/%s.txt" % (self.__backlog_dir, type)
        if formatted:
            return "%s/formatted/%d_%s.xml" % (self.__backlog_dir, pid, type)
        return "%s/%d.%s" % (self.__backlog_dir, pid, type)

    def collectOneProcess(self, pid, args=None, proc=None):
        if not args or not args.no_footprint:
            # Collect footprint information
            file_name = self.__get_output_filename("footprint", pid)
            os.system("footprint %d > %s" % (pid, file_name))

            p = footprint.ProcessFootprint(file_name)
            if proc:
                ProcessMgr.__merge_footprint(proc, p)

            # Generate formatted footprint information
            if not args or not args.no_format:
                p.report(self.__get_output_filename("footprint", pid, True))

        if not args or not args.no_vmmap:
            # Collect vmmap information
            file_name = self.__get_output_filename("vmmap", pid)
            os.system("vmmap %d > %s" % (pid, file_name))

            p = vmmap.ProcessVmmap(file_name)
            if proc:
                ProcessMgr.__merge_vmmap(proc, p)

            # Generate formatted vmmap information
            if not args or not args.no_format:
                p.report(self.__get_output_filename("vmmap", pid, True))

    def __collect_task(proc, mgr, args):
        print("Collecting memory information for process %d ..." % proc["pid"])
        mgr.collectOneProcess(proc["pid"], args, proc)

    def collect(self, args=None):
        # Create formatted output directory if needed
        if not args or not args.no_format:
            self.__create_formatted_output_dir()

        # Collect only one process
        if args and args.pid:
            self.collectOneProcess(args.pid, args)
            return

        # Get all processes by ps command line
        self.__processes = []
        lines = command("ps", "-AM", "-o", "ppid,nice,rss,vsz,wq,command")
        with open(self.__get_output_filename("ps"), "w") as f:
            self.__parse_ps(lines, f)

        # Generate formatted ps information
        if not args or not args.no_format:
            self.report(self.__get_output_filename("ps", formatted=True))

        # Collect vm_stat system memory information
        os.system("vm_stat > %s" % self.__get_output_filename("vm_stat"))

        # generate formatted vm_stat information
        if not args or not args.no_format:
            formatted_stat = vm_stat.MachSystemVmStat(self.__get_output_filename("vm_stat"))
            formatted_stat.report(self.__get_output_filename("vm_stat", formatted=True))

        # No need to collect information for processes
        if args and args.no_vmmap and args.no_footprint:
            return

        all_cnt = len(self.__processes)
        for idx, proc in enumerate(self.__processes):
            print("[%d:%d]Collecting memory information for process %d ..." % (idx, all_cnt, proc["pid"]))
            self.collectOneProcess(proc["pid"], args, proc)

        #executor = concurrent.futures.ThreadPoolExecutor()
        #futures = [executor.submit(ProcessMgr.__collect_task, item, self, args) for item in self.__processes]
        #failCnt = 0
        #for future in concurrent.futures.as_completed(futures):
        #    result = future.result()
        #    if not result:
        #        failCnt = failCnt + 1
        #executor.shutdown()

if __name__ == "__main__":
    def createArgParser():
        import argparse
        parser = argparse.ArgumentParser(description='Collect memory information from macOS.')
        parser.add_argument('-o', '--output',
                            help='memory information output directory', required=True)

        parser.add_argument('-v', '--no-vmmap',
                            help='Donot collect detail vmmap information for process', required=False, default=False, action='store_true')

        parser.add_argument('-f', '--no-footprint',
                            help='Donot collect footprint information for process', required=False, default=False,
                            action='store_true')

        parser.add_argument('-t', '--no-format',
                            help='Donot generate formatted memory information', required=False, default=False,
                            action='store_true')

        parser.add_argument('-p', '--pid',
                            help='Update memory information', required=False, default="")

        return parser

    parser = createArgParser()
    args = parser.parse_args()

    mgr = ProcessMgr(args.output)
    #mgr = ProcessMgr("/Users/handy/Documents/work/projects/python/macArchInfo/collector/vmmap/backlog_20230513_2340")
    mgr.collect(args)
    #mgr.report("processes.txt")
    #print(mgr.get_meminfo())
    #footprint_regions = 0
    #vmmap_regions = 0
    #for proc in mgr.get_all():
    #    footprint_regions = footprint_regions + proc["footprint_Regions"]
    #    vmmap_regions = vmmap_regions + proc["regions"]
